
/*****************************
The Self-Employment Effects of the EITC in the Gig Economy

by Riley Wilson

This do file cleans the ACS data needed to produce results for  
"The Self-Employment Effeects of the EITC in the Gig Economy"

The main exhibits in the paper are constructed in the master.double
*****************************/


/****************************
Clean up the ACS data to create analysis sample
*****************************/
cd "" //Set directory 
use "data\IPUMS Data\gigwork_acs_data", clear

drop cpuma

egen double fam_id = group(year serial)
gen under18 = age <= 18
gen num_ekids = 0
gen under24 = age <= 24 
gen num_ekids24 = 0
//Identify the number of EITC eligible children
forval i = 1/20{
	gen t = under18 if momloc == `i'
	bys fam_id: egen num`i' = sum(t)
	replace num_ekids = num`i' if pernum == `i'
	drop t num`i'
	gen t = under24 if momloc == `i'
	bys fam_id: egen num`i' = sum(t)
	replace num_ekids24 = num`i' if pernum == `i'
	drop t num`i'
}

order year serial famsize momloc poploc sploc nchild relate sex age num_ekids under18


drop momloc poploc sploc nsibs sample strata gq 

//Taking care of missing values
for X in any hhincome inctot ftotinc : replace X = 0 if X == 9999999
for X in any incinvst incwage incbus00 incretir : replace X = 0 if X == 999999
for X in any incsup incss incwelfr incother: replace X = 0 if X == 99999
for X in any yngch eldch : replace X = 0 if X == 99

replace incwage = . if inlist(incwage, 999999,999998)
replace inctot = . if inlist(inctot, 999999,999998)
replace valueh = . if inlist(valueh, 999999,999998, 0000000)

//Home ownership
gen own_home = ownershp == 1
gen rent = ownershp == 2

//Car ownership
gen own_car = vehicles >=1 & vehicles<9
gen own_1lesscar = inlist(vehicles,0,1,9)


//Race and ethnicity variables
gen nh_white=race==1 & hispan==0
gen nh_black=race==2 & hispan==0
gen nh_other=race!=1 & race!=2 & hispan==0
gen hisp=hispan!=0 & hispan!=9

//Gender variables
gen female = sex==2
gen male = sex==1

//age variables
gen age_18_34 = age>=18 & age<=34
gen age_35_54 = age>=35 & age<=54
gen age_55_64 = age>=55 & age<=64
gen age_over64 = age>64

//education
gen lshs = educd<=61
gen hs = educd >= 62 & educd <=64
gen somcoll = educd>64 & educd<=100
gen coll = educd>=101 & educd<=113
gen advcoll = educd>=114

//employment/labor force status
gen notinlf = empstat == 3
gen employed = empstat == 1
gen unemployed = empstat == 2

//health insurance
gen hh_healthin = hcovany == 2
replace hh_healthin = . if year <2008 // not avaliable prior to 2008
replace hh_healthin = . if relate ~= 1

gen hh_pubhin = hcovpub == 2 
replace hh_pubhin = . if year<2008 // not avaliable prior to 2008
replace hh_pubhin = . if relate ~= 1

//drives car for work
gen occ_driverall= inlist(occ2010,9100,9130,9140,9150)
gen occ_taxidriver = occ2010 == 9140
gen occ_driver = occ2010 == 9130
gen occ_mvo = occ2010 == 9150
gen occ_hh = occ2010 if relate == 1
gen occ_spar = occ2010 if inlist(relate,2,11)

gen ind_taxi = ind1990 == 402
gen ind_hh = ind1990 if relate == 1
gen ind_spar = ind1990 if inlist(relate,2,11)
//create indicators for both self employed and work for wages
gen selfemp = classwkr == 1 & empstat == 1
gen hh_selfemp = classwkr == 1 if relate == 1 & empstat == 1
gen wagedemp = classwkr == 2 & empstat == 1
gen have_selfempinc = incbus00>0
replace have_selfempinc = . if incbus00 == .
bys fam_id: egen h_have_selfempinc = max(have_selfempinc)
//create marital status variables
gen married = inlist(marst,1,2)
gen divsepwidow = inlist(marst,3,4,5) //separated, divorced, or widowed
gen nevmar = marst == 6

//head of household characteristics
gen head=relate==1
gen spouse=relate==2
gen partner=relate==11

gen hh_male=head & male
gen hh_female=head & female

gen hh_married=head & married
gen hh_divsepwidow=head & divsepwidow
gen hh_nevmar=head & nevmar

gen hh_lshs=head & lshs
gen hh_hs=head & hs
gen hh_somcoll=head & somcoll
gen hh_coll=head & coll
gen hh_advcoll=head & advcoll

gen hh_age_18_34 = head & age_18_34
gen hh_age_35_54 = head & age_35_54
gen hh_age_55_64 = head & age_55_64
gen hh_age_over64 =head & age_over64

gen hh_nh_white=head & nh_white
gen hh_nh_black=head & nh_black
gen hh_nh_other=head & nh_other
gen hh_hisp=head & hisp

//identify if grandchild in the home
gen gchild = relate == 9 
bys year serial: egen gchild_inhome = max(gchild)

//identify movers 
gen moved_any = migrate1d>=20 & migrate1d<=40
replace moved_any = . if migrate1d == 0
gen moved_migpuma = migrate1d>=24 & migrate1d<=40
replace moved_migpuma = . if migrate1d == 0
gen moved_migpumainst = migrate1d>=24 & migrate1d<=25
replace moved_migpumainst = . if migrate1d == 0
gen moved_states = inlist(migrate1d,31,32,40)
replace moved_states = . if migrate1d == 0

gen diff_msa1yr = moved_any == 1 & met2013 ~= migmet131
replace diff_msa1yr = . if moved_any == . 

foreach v in moved_any moved_migpuma moved_migpumainst moved_states diff_msa1yr {
	gen hh_`v' = head & `v'
}

//keep head of household and any spouse or partner of head of hh
keep if head == 1 | spouse == 1 | partner == 1

// collapsing data to the household level
gcollapse (max) num_ekids num_ekids24 foodstmp hhincome eldch trantime famsize nchild cbserial hhwt statefip countyfip metro met2013 city puma valueh hh_nh_white hh_nh_black hh_nh_other hh_hisp hh_male hh_female hh_married hh_divsepwidow hh_nevmar hh_lshs hh_hs hh_somcoll hh_coll hh_advcoll notinlf employed unemployed hh_age_18_34 hh_age_35_54 hh_age_55_64 hh_age_over64 cluster educ wkswork2 uhrswork ftotinc poverty pwmet13 fam_id own_home rent occ_* ind_* yngch have_selfempinc h_have_selfempinc selfemp hh_selfemp wagedemp own_car own_1lesscar vehicles qvehicle hh_healthin hh_pubhin gchild_inhome moved_any moved_migpuma moved_migpumainst moved_states diff_msa1yr hh_moved_any hh_moved_states hh_moved_migpuma hh_moved_migpumainst hh_diff_msa1yr (sum) inctot incwage incinvst incsupp incbus00, by(year serial)

//Rename these to more accurately reflect what they are: employment vars for either head of hh or spouse/partner
rename notinlf anynotinlf
rename employed anyemployed
rename unemployed anyunemployed

gen cbsa_id = met2013

**Merging Uber entry data
merge m:1 cbsa_id using "data\rideshare_rollout\uber_entry_merged_cleaned"

//Get the number of dependents to merge on to the EITC data (WI has different rules)
gen numdepends_WI = 0
replace numdepends_WI = num_ekids if statefip == 55

gen num_echild = num_ekids
replace num_echild = 3 if num_echild > 3

// dropping these datapoints because these are cbsas which are too small to be identified in the ACS data
drop if _m == 2
// dropping these datapoints since they are from cities not within our Uber data
// drop if _m == 1

// value 1 means it couldnt merge with CBSA, 2 indicates it did
rename _m merged_cbsa_flag

//Merging federal EITC data
merge m:1 year num_echild using "data\federal_eitcparm_bychildren1975_2021.dta"

// dropping these values which are Fed EITC data years not relevant to our data
drop if _m == 2
drop _m

rename statefip stfips

//Merging state EITC data
/*year statefips num_echild*/
merge m:1 year stfips num_echild using "data\State EITC Policy\state_eitc_yr_numechild"

// dropping unmatch values because these are years outside of the range of our uber data
drop if _m == 2
drop _m
// we leave values of _m == 1 because these are years/states where no eitc policy existed at the state level, so we replace these missing values with 0
replace eitcpct = 0 if eitcpct == .
rename eitcpct state_eitcpct
gen has_eitc=1 if state_eitcpct>0
replace has_eitc=0 if has_eitc==.

//Create variable for the federal credit + state credit
gen max_total_credit = (1+(state_eitcpct/100))*maxcredit

order year state serial famsize DateofEntry max_total_credit

//Create variables for areas that got Uber
gen uber_exposed = 0
replace uber_exposed = 1 if year >= year(DateofEntry) 

gen uber_exposed_partial = 0
replace uber_exposed_partial = 1 if year > year(DateofEntry)
replace uber_exposed_partial = month(DateofEntry)/12 if year == year(DateofEntry)

//adjusting $$ for inflation
merge m:1 year using "data\pce_inflator2020"
drop if _m == 2
drop _m

for X in any hhincome inctot ftotinc incwage incinvst incbus00 incsupp max_total_credit: gen X2020 = X*pceindex2020

replace max_total_credit2020 = max_total_credit2020/1000
gen max_total_credit2020_uber = max_total_credit2020*uber_exposed

//update CBSA definition
rename cbsa_id cbsa_u
egen cbsa_id = group(met2013 stfips) 

egen st_yr = group(stfips year)

gen head_hsorless = hh_lshs == 1 | hh_hs == 1

compress
save "data\household_ubereitc2005_2019", replace
keep if head_hsorless == 1
compress
save "data\hslesshousehold_ubereitc2005_2019", replace
